####################################################################################
### Political leadership has limited impact on fossil fuel taxes and subsidies   ###
### Figures and tables in the main text                                          ###
### This version: October 29, 2022                                               ###
####################################################################################

#---------------------------------------------------#
# Preliminaries                                     #
#---------------------------------------------------#

rm(list = ls())

#List of packages for session
.packages = c("tidyverse","readstata13", "gridExtra", "lubridate", "estimatr", "texreg",
              "survival", "survminer")

#Install CRAN packages (if not already installed)
.inst = .packages %in% installed.packages()
if(length(.packages[!.inst]) > 0) install.packages(.packages[!.inst])

#Loading packages into session 
lapply(.packages, require, character.only=TRUE)

# Set Working Directory to wherever files are downloaded
#setwd("")

#---------------------------------------------------#
# Loading Data                                      #
#---------------------------------------------------#

#Main datasets for regression analysis
load("ffs-leaders-2022-10-29.RData")

#Output from the simulations of null distributions
load("complete_null_distributions_oct22.RData")

#Dataset with changes and covariates
leaders_taxchange <- read.csv("leaders_change_newtenure.csv")

###############################################
############ Main Text -- Figure 1 ############                     
###############################################

#Figure 1. Gasoline prices in real USD per liter in Iran (Top) and Nigeria (Bottom), 1990–2015. Price lines are shaded and colored to distinguish different leader tenures in each country.

psubs <- unique(censdat$country2[censdat$persistent == 1])

gasdat <- newdat %>%
  dplyr::select(-1) %>%
  mutate(date = as.Date(paste(year_month,"01",sep = "-"), format = "%Y-%m-%d")) %>%
  filter(country2 %in% psubs)

# Leader-spell start and end
irn <- gasdat %>% filter(country2=="IRN", leader_entry + leader_exit == 1)
irn <- rbind(irn, gasdat[gasdat$leader_id=="Rouhani-IRN"&gasdat$date=="2015-11-01",])

nga <- gasdat %>% filter(country2=="NGA", leader_entry + leader_exit == 1)
nga <- rbind(nga, gasdat[gasdat$leader_id=="Goodluck Jonathan-NGA"&gasdat$date=="2015-03-01",])

# Fixing Yar'Adua so his price is same as last month of Obasanjo
nga$price_usd_2015[nga$leader_id=="Yar'Adua-NGA"][1] <- nga$price_usd_2015[nga$leader_id=="Obasanjo-NGA"][2]
gasdat$price_usd_2015[gasdat$leader_id=="Yar'Adua-NGA"][1] <- nga$price_usd_2015[nga$leader_id=="Obasanjo-NGA"][2]

nga %>% select(leader, date, price_usd_2015)


ggplot() +
  geom_rect(aes(xmin=ymd('1990-01-01'),
                xmax = ymd('1997-08-01'),
                ymin = -Inf,
                ymax = Inf), fill = 'grey', alpha = 0.5) +
  geom_rect(aes(xmin=ymd('2005-09-01'),
                xmax = ymd('2013-08-01'),
                ymin = -Inf,
                ymax = Inf), fill = 'grey', alpha = 0.5) +
  geom_point(data = irn,
             aes(x = date, y = price_usd_2015, col = factor(leader)), 
             size = 3) +
  geom_line(data = gasdat %>% filter(country2=="IRN"),
            aes(x = date, y = price_usd_2015, col = factor(leader)), 
            size = 1.5) +
  # geom_line(data = irn,
  #           aes(x = date, y = price_usd_2015, group = leader), 
  #           size = 1, linetype = "dashed") +
  annotate("text", x = as.Date("1993-10-12"), y = 1.45, label = "Rafsanjani", size = 4) +
  annotate("text", x = as.Date("2001-06-01"), y = 1.35, label = "Khatami", size = 4) +
  annotate("text", x = as.Date("2009-09-01"), y = 1.45, label = "Ahmadinejad", size = 4) +
  annotate("text", x = as.Date("2015-02-01"), y = 1.35, label = "Rouhani", size = 4) +
  scale_x_date(date_breaks = "24 months", date_labels = "%Y", expand = c(0, 0)) +
  scale_y_continuous(breaks = seq(0,1.5,0.25), labels=scales::dollar_format()) + 
  scale_color_manual(values = c("#08519C","forestgreen","#08519C","forestgreen")) +
  # scale_colour_brewer(palette = "Paired") +
  # scale_colour_brewer(palette = "Set1") +
  labs(x = "", 
       y = "") +
  # ggtitle("Gasoline prices in Iran, by leader-tenure", subtitle = "Real 2015 USD / liter") +
  coord_cartesian(ylim = c(0,1.5),xlim=c(as.Date("1990-01-01"),as.Date("2016-06-15"))) +
  theme(legend.position = "none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=13), 
        axis.title.x = element_text(size=18), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.y = element_text(size=16), 
        axis.title.y = element_text(size=18))

# Nigeria

ggplot() +
  geom_rect(aes(xmin=ymd('1993-08-01'),
                xmax = ymd('1993-12-01'),
                ymin = -Inf,
                ymax = Inf), fill = 'grey', alpha = 0.5) +
  geom_rect(aes(xmin=ymd('1998-06-01'),
                xmax = ymd('1999-06-01'),
                ymin = -Inf,
                ymax = Inf), fill = 'grey', alpha = 0.5) +
  geom_rect(aes(xmin=ymd('2007-05-01'),
                xmax = ymd('2010-03-01'),
                ymin = -Inf,
                ymax = Inf), fill = 'grey', alpha = 0.5) +
  geom_point(data = nga,
             aes(x = date, y = price_usd_2015, col = factor(leader)),
             size = 3) +
  geom_line(data = gasdat %>% filter(country2=="NGA"),
            aes(x = date, y = price_usd_2015, col = factor(leader)), 
            size = 1.5) +
  # geom_line(data = nga,
  #           aes(x = date, y = price_usd_2015, group = leader), 
  #           size = 1, linetype = "dashed") +
  annotate("text", x = as.Date("1991-10-17"), y = 1.45, label = "Babangida", size = 4) +
  annotate("text", x = as.Date("1993-10-01"), y = 1.35, label = "Shonekan", size = 4) +
  annotate("text", x = as.Date("1996-03-01"), y = 1.45, label = "Abacha", size = 4) +
  annotate("text", x = as.Date("1998-11-30"), y = 1.35, label = "Abubakar", size = 4) +
  annotate("text", x = as.Date("2003-05-16"), y = 1.45, label = "Obasanjo", size = 4) +
  annotate("text", x = as.Date("2008-10-01"), y = 1.35, label = "Yar'Adua", size = 4) +
  annotate("text", x = as.Date("2012-09-01"), y = 1.45, label = "Jonathan", size = 4) +
  scale_x_date(date_breaks = "24 months", date_labels = "%Y", expand = c(0, 0)) +
  scale_y_continuous(breaks = seq(0,1.5,0.25), labels=scales::dollar_format()) + 
  scale_color_manual(values = c("forestgreen","#08519C","forestgreen","forestgreen",
                                "#08519C","forestgreen","#08519C","#08519C")) +
  labs(x = "", 
       y = "") +
  # ggtitle("Gasoline prices in Nigeria, by leader-tenure", subtitle = "Real 2015 USD / liter") +
  coord_cartesian(ylim = c(0,1.5),xlim=c(as.Date("1990-01-01"),as.Date("2016-06-15"))) +
  theme(legend.position = "none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=13), 
        axis.title.x = element_text(size=18), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.y = element_text(size=16), 
        axis.title.y = element_text(size=18))

###############################################
############ Main Text -- Figure 2 ############                     
###############################################

#Fig. 2. Each plot shows the distribution of R2 values from 1,000 estimates with differently permuted orderings of leader tenures used to produce LFEs. 

#**Note: the final figure will show as a PDF in the folder where the files are located

# --------------------------------------------------------------------------- #
# Models for each group
# --------------------------------------------------------------------------- #

mod_all <- lm(bmgap2015adj ~ factor(country2) + obsid*time_trend, data = data_try)

mod_dem <- lm(bmgap2015adj ~ factor(country2) + obsid*time_trend, data = democs)

mod_aut <- lm(bmgap2015adj ~ factor(country2) + obsid*time_trend, data = autocs)

mod_export <- lm(bmgap2015adj ~ factor(country2) + obsid*time_trend, data = exporters)

mod_import <- lm(bmgap2015adj ~ factor(country2) + obsid*time_trend, data = importers)

mod_perssub <- lm(bmgap2015adj ~ factor(country2) + obsid*time_trend, data = persistentsubs)

mod_perstax <- lm(bmgap2015adj ~ factor(country2) + obsid*time_trend, data = fixednonpersistent)

mod_pres <- lm(bmgap2015adj ~ factor(country2) + obsid*time_trend, data = presid)

mod_parl <- lm(bmgap2015adj ~ factor(country2) + obsid*time_trend, data = parl)

# --------------------------------------------------------------------------- #
# Plot for all country
# --------------------------------------------------------------------------- #

nums <- nrow(dat)
pval <- sum(1*(summary(mod_all)$r.squared < dat$r.squared))/nums

dens <- density(dat$r.squared)

sata <- tibble(x = dens$x, y = dens$y) %>% 
  mutate(variable = case_when(
    (x >= summary(mod_all)$r.squared) ~ "On",
    (x < summary(mod_all)$r.squared) ~ "Off",
    TRUE ~ NA_character_))

plot_all <- ggplot() +
  geom_histogram(data = dat, aes(x = r.squared, y = ..density..), bins = 50) +
  geom_density(data = dat, aes(x = r.squared)) +
  geom_area(data = filter(sata, variable == 'On'), aes(x, y), 
            fill = 'red', alpha = 0.2) + 
  geom_area(data = filter(sata, variable == 'Off'), aes(x, y), 
            fill = '#5CBD92FF', alpha = 0.5) +
  ggtitle("All countries", subtitle = paste("p = ",round(pval,3))) +
  geom_vline(xintercept = summary(mod_all)$r.squared, col = "blue", size = 1.2, linetype = "solid") +
  scale_x_continuous(breaks = c(min(dat$r.squared),
                                min(dat$r.squared)+diff(range(dat$r.squared))/3,
                                min(dat$r.squared)+2*diff(range(dat$r.squared))/3,
                                max(dat$r.squared)),
                     labels = round(c(min(dat$r.squared),
                                      min(dat$r.squared)+diff(range(dat$r.squared))/3,
                                      min(dat$r.squared)+2*diff(range(dat$r.squared))/3,
                                      max(dat$r.squared)),3)) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=16), 
        axis.title.x = element_text(size=18), 
        axis.line.y = element_blank(), 
        axis.text.y = element_blank(),
        axis.title.y = element_text(size=18),
        axis.ticks.y = element_blank())

# --------------------------------------------------------------------------- #
# Plot for democracies
# --------------------------------------------------------------------------- #

pval <- sum(1*(summary(mod_dem)$r.squared < dat_democs$r.squared))/nums

dens <- density(dat_democs$r.squared)

sata <- tibble(x = dens$x, y = dens$y) %>% 
  mutate(variable = case_when(
    (x >= summary(mod_dem)$r.squared) ~ "On",
    (x < summary(mod_dem)$r.squared) ~ "Off",
    TRUE ~ NA_character_))

plot_democ <- ggplot() +
  geom_histogram(data = dat_democs, aes(x = r.squared, y = ..density..), bins = 50) +
  geom_density(data = dat_democs, aes(x = r.squared)) +
  geom_area(data = filter(sata, variable == 'On'), aes(x, y), 
            fill = 'red', alpha = 0.2) + 
  geom_area(data = filter(sata, variable == 'Off'), aes(x, y), 
            fill = '#5CBD92FF', alpha = 0.5) +
  ggtitle("Democracies", subtitle = paste("p = ",round(pval,3))) +
  geom_vline(xintercept = summary(mod_dem)$r.squared, col = "blue", size = 1.2, linetype = "solid") +
  scale_x_continuous(breaks = c(min(dat_democs$r.squared),
                                max(dat_democs$r.squared)),
                     labels = round(c(min(dat_democs$r.squared),
                                      max(dat_democs$r.squared)),3)) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=16), 
        axis.title.x = element_text(size=18), 
        axis.line.y = element_blank(), 
        axis.text.y = element_blank(),
        axis.title.y = element_text(size=18),
        axis.ticks.y = element_blank())

# --------------------------------------------------------------------------- #
# Plot for autocracies
# --------------------------------------------------------------------------- #

pval <- sum(1*(summary(mod_aut)$r.squared < dat_autocs$r.squared))/nums

dens <- density(dat_autocs$r.squared)

sata <- tibble(x = dens$x, y = dens$y) %>% 
  mutate(variable = case_when(
    (x >= summary(mod_aut)$r.squared) ~ "On",
    (x < summary(mod_aut)$r.squared) ~ "Off",
    TRUE ~ NA_character_))

plot_autoc <- ggplot() +
  geom_histogram(data = dat_autocs, aes(x = r.squared, y = ..density..), bins = 50) +
  geom_density(data = dat_autocs, aes(x = r.squared)) +
  geom_area(data = filter(sata, variable == 'On'), aes(x, y), 
            fill = 'red', alpha = 0.2) + 
  geom_area(data = filter(sata, variable == 'Off'), aes(x, y), 
            fill = '#5CBD92FF', alpha = 0.5) +
  ggtitle("Autocracies", subtitle = paste("p = ",round(pval,3))) +
  geom_vline(xintercept = summary(mod_aut)$r.squared, col = "blue", size = 1.2, linetype = "solid") +
  scale_x_continuous(breaks = c(min(dat_autocs$r.squared),
                                max(dat_autocs$r.squared)),
                     labels = round(c(min(dat_autocs$r.squared),
                                      max(dat_autocs$r.squared)),3)) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=16), 
        axis.title.x = element_text(size=18), 
        axis.line.y = element_blank(), 
        axis.text.y = element_blank(),
        axis.title.y = element_text(size=18),
        axis.ticks.y = element_blank())

# --------------------------------------------------------------------------- #
# Plot for Oil exporters
# --------------------------------------------------------------------------- #

pval <- sum(1*(summary(mod_export)$r.squared < dat_exports$r.squared))/nums
dens <- density(dat_exports$r.squared)

sata <- tibble(x = dens$x, y = dens$y) %>% 
  mutate(variable = case_when(
    (x >= summary(mod_export)$r.squared) ~ "On",
    (x < summary(mod_export)$r.squared) ~ "Off",
    TRUE ~ NA_character_))

plot_export <- ggplot() +
  geom_histogram(data = dat_exports, aes(x = r.squared, y = ..density..), bins = 50) +
  geom_density(data = dat_exports, aes(x = r.squared)) +
  geom_area(data = filter(sata, variable == 'On'), aes(x, y), 
            fill = 'red', alpha = 0.2) + 
  geom_area(data = filter(sata, variable == 'Off'), aes(x, y), 
            fill = '#5CBD92FF', alpha = 0.5) +
  ggtitle("Oil Exporters", subtitle = paste("p = ",round(pval,3))) +
  geom_vline(xintercept = summary(mod_export)$r.squared, col = "blue", size = 1.2, linetype = "solid") +
  scale_x_continuous(breaks = c(min(dat_exports$r.squared),
                                max(dat_exports$r.squared)),
                     labels = round(c(min(dat_exports$r.squared),
                                      max(dat_exports$r.squared)),3)) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=16), 
        axis.title.x = element_text(size=18), 
        axis.line.y = element_blank(), 
        axis.text.y = element_blank(),
        axis.title.y = element_text(size=18),
        axis.ticks.y = element_blank())

# --------------------------------------------------------------------------- #
# Plot for Oil importers
# --------------------------------------------------------------------------- #

pval <- sum(1*(summary(mod_import)$r.squared < dat_imports$r.squared))/nums
dens <- density(dat_imports$r.squared)

sata <- tibble(x = dens$x, y = dens$y) %>% 
  mutate(variable = case_when(
    (x >= summary(mod_import)$r.squared) ~ "On",
    (x < summary(mod_import)$r.squared) ~ "Off",
    TRUE ~ NA_character_))

plot_import <- ggplot() +
  geom_histogram(data = dat_imports, aes(x = r.squared, y = ..density..), bins = 50) +
  geom_density(data = dat_imports, aes(x = r.squared)) +
  geom_area(data = filter(sata, variable == 'On'), aes(x, y), 
            fill = 'red', alpha = 0.2) + 
  geom_area(data = filter(sata, variable == 'Off'), aes(x, y), 
            fill = '#5CBD92FF', alpha = 0.5) +
  ggtitle("Oil Importers", subtitle = paste("p = ",round(pval,3))) +
  geom_vline(xintercept = summary(mod_import)$r.squared, col = "blue", size = 1.2, linetype = "solid") +
  scale_x_continuous(breaks = c(min(dat_imports$r.squared),
                                max(dat_imports$r.squared)),
                     labels = round(c(min(dat_imports$r.squared),
                                      max(dat_imports$r.squared)),3)) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=16), 
        axis.title.x = element_text(size=18), 
        axis.line.y = element_blank(), 
        axis.text.y = element_blank(),
        axis.title.y = element_text(size=18),
        axis.ticks.y = element_blank())

# --------------------------------------------------------------------------- #
# Plot for persistent subsidizers
# --------------------------------------------------------------------------- #

pval <- sum(1*(summary(mod_perssub)$r.squared < dat_ps$r.squared))/nums
dens <- density(dat_ps$r.squared)

sata <- tibble(x = dens$x, y = dens$y) %>% 
  mutate(variable = case_when(
    (x >= summary(mod_perssub)$r.squared) ~ "On",
    (x < summary(mod_perssub)$r.squared) ~ "Off",
    TRUE ~ NA_character_))

plot_perssub <- ggplot() +
  geom_histogram(data = dat_ps, aes(x = r.squared, y = ..density..), bins = 50) +
  geom_density(data = dat_ps, aes(x = r.squared)) +
  geom_area(data = filter(sata, variable == 'On'), aes(x, y), 
            fill = 'red', alpha = 0.2) + 
  geom_area(data = filter(sata, variable == 'Off'), aes(x, y), 
            fill = '#5CBD92FF', alpha = 0.5) +
  ggtitle("Persistent Subsidizers", subtitle = paste("p = ",round(pval,3))) +
  geom_vline(xintercept = summary(mod_perssub)$r.squared, col = "blue", size = 1.2, linetype = "solid") +
  scale_x_continuous(breaks = c(min(dat_ps$r.squared),
                                max(dat_ps$r.squared)),
                     labels = round(c(min(dat_ps$r.squared),
                                      max(dat_ps$r.squared)),3)) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=16), 
        axis.title.x = element_text(size=18), 
        axis.line.y = element_blank(), 
        axis.text.y = element_blank(),
        axis.title.y = element_text(size=18),
        axis.ticks.y = element_blank())

# --------------------------------------------------------------------------- #
# Plot for persistent taxers
# --------------------------------------------------------------------------- #

pval <- sum(1*(summary(mod_perstax)$r.squared < dat_fnps$r.squared))/nums
dens <- density(dat_fnps$r.squared)

sata <- tibble(x = dens$x, y = dens$y) %>% 
  mutate(variable = case_when(
    (x >= summary(mod_perstax)$r.squared) ~ "On",
    (x < summary(mod_perstax)$r.squared) ~ "Off",
    TRUE ~ NA_character_))


plot_perstax <- ggplot() +
  geom_histogram(data = dat_fnps, aes(x = r.squared, y = ..density..), bins = 50) +
  geom_density(data = dat_fnps, aes(x = r.squared)) +
  geom_area(data = filter(sata, variable == 'On'), aes(x, y), 
            fill = 'red', alpha = 0.2) + 
  geom_area(data = filter(sata, variable == 'Off'), aes(x, y), 
            fill = '#5CBD92FF', alpha = 0.5) +
  ggtitle("Persistent Taxers", subtitle = paste("p = ",round(pval,3))) +
  geom_vline(xintercept = summary(mod_perstax)$r.squared, col = "blue", size = 1.2, linetype = "solid") +
  scale_x_continuous(breaks = c(min(dat_fnps$r.squared),
                                max(dat_fnps$r.squared)),
                     labels = round(c(min(dat_fnps$r.squared),
                                      max(dat_fnps$r.squared)),3)) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=16), 
        axis.title.x = element_text(size=18), 
        axis.line.y = element_blank(), 
        axis.text.y = element_blank(),
        axis.title.y = element_text(size=18),
        axis.ticks.y = element_blank())

# --------------------------------------------------------------------------- #
# Plot for presidential
# --------------------------------------------------------------------------- #

pval <- sum(1*(summary(mod_pres)$r.squared < dat_presid$r.squared))/nums
dens <- density(dat_presid$r.squared)

sata <- tibble(x = dens$x, y = dens$y) %>% 
  mutate(variable = case_when(
    (x >= summary(mod_pres)$r.squared) ~ "On",
    (x < summary(mod_pres)$r.squared) ~ "Off",
    TRUE ~ NA_character_))

plot_presid <- ggplot() +
  geom_histogram(data = dat_presid, aes(x = r.squared, y = ..density..), bins = 50) +
  geom_density(data = dat_presid, aes(x = r.squared)) +
  geom_area(data = filter(sata, variable == 'On'), aes(x, y), 
            fill = 'red', alpha = 0.2) + 
  geom_area(data = filter(sata, variable == 'Off'), aes(x, y), 
            fill = '#5CBD92FF', alpha = 0.5) +
  ggtitle("Presidential Democracies", subtitle = paste("p = ",round(pval,3))) +
  geom_vline(xintercept = summary(mod_pres)$r.squared, col = "blue", size = 1.2, linetype = "solid") +
  scale_x_continuous(breaks = c(min(dat_presid$r.squared),
                                max(dat_presid$r.squared)),
                     labels = round(c(min(dat_presid$r.squared),
                                      max(dat_presid$r.squared)),3)) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=16), 
        axis.title.x = element_text(size=18), 
        axis.line.y = element_blank(), 
        axis.text.y = element_blank(),
        axis.title.y = element_text(size=18),
        axis.ticks.y = element_blank())

# --------------------------------------------------------------------------- #
# Plot for parliamentary
# --------------------------------------------------------------------------- #

pval <- sum(1*(summary(mod_parl)$r.squared < dat_parl$r.squared))/nums
dens <- density(dat_parl$r.squared)

sata <- tibble(x = dens$x, y = dens$y) %>% 
  mutate(variable = case_when(
    (x >= summary(mod_parl)$r.squared) ~ "On",
    (x < summary(mod_parl)$r.squared) ~ "Off",
    TRUE ~ NA_character_))

plot_parl <- ggplot() +
  geom_histogram(data = dat_parl, aes(x = r.squared, y = ..density..), bins = 50) +
  geom_density(data = dat_parl, aes(x = r.squared)) +
  geom_area(data = filter(sata, variable == 'On'), aes(x, y), 
            fill = 'red', alpha = 0.2) + 
  geom_area(data = filter(sata, variable == 'Off'), aes(x, y), 
            fill = '#5CBD92FF', alpha = 0.5) +
  ggtitle("Parliamentary Democracies", subtitle = paste("p = ",round(pval,3))) +
  geom_vline(xintercept = summary(mod_parl)$r.squared, col = "blue", size = 1.2, linetype = "solid") +
  scale_x_continuous(breaks = c(min(dat_parl$r.squared),
                                max(dat_parl$r.squared)),
                     labels = round(c(min(dat_parl$r.squared),
                                      max(dat_parl$r.squared)),3)) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=16), 
        axis.title.x = element_text(size=18), 
        axis.line.y = element_blank(), 
        axis.text.y = element_blank(),
        axis.title.y = element_text(size=18),
        axis.ticks.y = element_blank())

# --------------------------------------------------------------------------- #
# FIGURE 1: All plots together
# --------------------------------------------------------------------------- #

pl = list(plot_all, plot_democ, plot_autoc, 
          plot_presid, plot_parl, plot_import, 
          plot_export, plot_perstax, plot_perssub)
margin = theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))

# grid.arrange(nrow = 3, grobs = lapply(pl, "+", margin)) 

lay <- rbind(c(1,1),
             c(1,1),
             c(2,3),
             c(4,5),
             c(6,7),
             c(8,9))

pdf("fig1-null-distributions-25jan22.pdf",width=10,height=16,paper="special")
grid.arrange(grobs = lapply(pl, "+", margin),
             layout_matrix = lay, nrow = 6)
dev.off()

#Note: figure will be saved in the selected folder.

###############################################
############ Main Text -- Figure 3 ############                     
###############################################

#Fig. 3. Distribution of leader average changes in fuel taxes, highlighting the UNEP Champions of the Earth.


#Leader individual covariates

leadership_covariates <- newdat %>%
  distinct(obsid, .keep_all = T) %>%
  dplyr::select(obsid, leader, leader_id, country2, gender, leader_age, unieducation, execrlc)

#Dataset with changes and covariates
leaders_taxchange <- leaders_taxchange %>%
  dplyr::select(obsid, leader, country2, tenure_months, tenure_months_effective,
                mean_tax_entry, mean_tax_exit, diff_tax, rate_change_tax_eff, 
                presidential_democracy, fixedprice, persistent, oil, democracy, 
                pct_crisis_tenure, avg_inflation)

leaders_taxchange_cov <- leaders_taxchange %>%
  left_join(leadership_covariates, by = c("obsid","leader", "country2")) %>%
  mutate(leader_age = case_when(leader_id == "Nouri Abusahmain-LBY" ~ 58,
                                TRUE ~ as.numeric(leader_age)),
         leader_age_cat = case_when(leader_age <= 50 ~ "50 and Younger",
                                    leader_age > 50 & leader_age <= 60 ~ "51 to 60",
                                    leader_age > 60 ~ "61 and Older"),
         democracy = case_when(democracy >= 0.5 ~ "Democracy",
                               TRUE ~ "Autocracy"),
         presidential_democracy = case_when(democracy == "Democracy" & presidential_democracy >= 0.5 ~ "Presidential",
                                            democracy == "Democracy" & presidential_democracy < 0.5 ~ "Parliamentary",
                                            TRUE ~ "Autocracy"),
         unieducation = case_when(unieducation == 1 ~ "College-Level Education",
                                  unieducation == 0 ~ "Non-College-Level Education"),
         ideological_orientation = case_when(execrlc == 1 ~ "Right",
                                             execrlc == 2 ~ "Center",
                                             execrlc == 3 ~ "Left",
                                             TRUE ~ "Non-Applicable"),
         persistent_subsidizer = case_when(persistent == 1 ~ "Persistent Subsidizer",
                                           TRUE ~ "Persistent Taxer"),
         coe = ifelse(leader %in% c("Narendra Modi", "Michelle Bachelet", "Paul Kagame",
                                    "Hasina Wazed", "Bambang Yudhoyono", "Elbegdorj", "Calderon",
                                    "Bharrat Jagdeo", "Helen Clark"), 1, 0),
         oil_exporter = case_when(oil > 0 ~ "Oil Exporter",
                                  TRUE ~ "Oil Importer"),
         pct_crisis_cat = if_else(pct_crisis_tenure >= 0.5, "Persistent Crisis", "Non-Persistent Crisis"))

leaders_taxchange_cov_ordered <- leaders_taxchange_cov %>%
  arrange(rate_change_tax_eff) %>% 
  mutate(orders = row_number())

# Splitting up Bachelet and Wazid
leaders_taxchange_cov_ordered$leader[leaders_taxchange_cov_ordered$leader=="Michelle Bachelet" & leaders_taxchange_cov_ordered$obsid=="CHL-2006"] <- "Michelle Bachelet I"
leaders_taxchange_cov_ordered$leader[leaders_taxchange_cov_ordered$leader=="Michelle Bachelet" & leaders_taxchange_cov_ordered$obsid=="CHL-2014"] <- "Michelle Bachelet II"

leaders_taxchange_cov_ordered$leader[leaders_taxchange_cov_ordered$leader=="Hasina Wazed" & leaders_taxchange_cov_ordered$obsid=="BNG-1996-2"] <- "Hasina Wazed I"
leaders_taxchange_cov_ordered$leader[leaders_taxchange_cov_ordered$leader=="Hasina Wazed" & leaders_taxchange_cov_ordered$obsid=="BNG-2009"] <- "Hasina Wazed II"

# Names with country (manually entering)
leaders_taxchange_cov_ordered$leader.cty <- leaders_taxchange_cov_ordered$leader
leaders_taxchange_cov_ordered$leader.cty[leaders_taxchange_cov_ordered$coe==1] <- 
  c("Bachelet - Chile (2nd term)",
    "Calderon - Mexico",
    "Bachelet - Chile (1st term)",
    "Yudhoyono - Indonesia",
    "Clark - New Zealand",
    "Wazed - Bangladesh (1st term)",
    "Kagame - Rwanda",
    "Elbegdorj - Mongolia",
    "Wazed - Bangladesh (2nd term)",
    "Jagdeo - Guyana",
    "Modi - India")

ggplot(leaders_taxchange_cov_ordered, 
       aes(x = rate_change_tax_eff, y = orders, 
           col = as.factor(coe), alpha = as.factor(coe))) +
  geom_point() +
  labs(x = "Average rate of change in fuel taxes, by leader term", y = "") +
  scale_color_manual(values = c("gray","black")) +
  scale_alpha_manual(values = c(0.2, 1)) +
  ggrepel::geom_label_repel(data = leaders_taxchange_cov_ordered %>% filter(coe==1), aes(x = rate_change_tax_eff, y = orders, label = leader.cty)) +
  scale_x_continuous(breaks = seq(-0.05,0.05,0.025)) +
  theme(legend.position = "none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=11), 
        axis.title.x = element_text(size=12), 
        axis.line.y = element_blank(),
        axis.text.y = element_blank(), 
        axis.title.y = element_blank(),
        axis.ticks.y = element_blank())

# Adding nudges to fix positioning
leaders_taxchange_cov_ordered$nudgex[leaders_taxchange_cov_ordered$coe==1] <- 
  c(0.03, -0.02, 0.027, -0.025, 0.022, -0.032, 0.021, -0.023, 0.033, -0.02, 0.015)

leaders_taxchange_cov_ordered$nudgey[leaders_taxchange_cov_ordered$coe==1] <- 
  c(0, -10, 10, -10, -5, -14, 0, 0, 10, 0, 0)

leaders_taxchange_cov_ordered %>% filter(coe==1) %>% select(leader.cty, nudgex, nudgey)

ggplot(leaders_taxchange_cov_ordered, 
       aes(x = rate_change_tax_eff, y = orders, 
           col = as.factor(coe), alpha = as.factor(coe))) +
  geom_point() +
  labs(x = "Average rate of change in fuel taxes, by leader term", y = "") +
  scale_color_manual(values = c("gray","black")) +
  scale_alpha_manual(values = c(0.2, 1)) +
  geom_label(data = leaders_taxchange_cov_ordered %>% filter(coe==1), 
             aes(x = rate_change_tax_eff, y = orders, label = leader.cty),
             nudge_x = leaders_taxchange_cov_ordered$nudgex[leaders_taxchange_cov_ordered$coe==1], 
             nudge_y = leaders_taxchange_cov_ordered$nudgey[leaders_taxchange_cov_ordered$coe==1]) +
  scale_x_continuous(breaks = seq(-0.05,0.05,0.025)) +
  theme(legend.position = "none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=22),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
        axis.text.x = element_text(size=11), 
        axis.title.x = element_text(size=12), 
        axis.line.y = element_blank(),
        axis.text.y = element_blank(), 
        axis.title.y = element_blank(),
        axis.ticks.y = element_blank())

###############################################
############ Main Text -- Table 1 #############                     
###############################################

#Table 1: Variation explained by CFE and by the LFE models.

### All Countries

fit.cfe1.all <- lm_robust(bmgap2015adj ~ as.factor(country2) +
                            as.factor(country2)*country_month,
                          data=data_try,
                          clusters = country2)

#R squared = 0.8735

fit.lfe1.all <- lm_robust(bmgap2015adj ~ as.factor(obsid) +
                            as.factor(obsid)*time_trend, 
                          data=data_try,
                          clusters = country2)

#R squared = 0.9275

dat %>%
  filter(r.squared > 0.9275) %>%
  count() #178 / 1000 = 0.178

### Democracies

fit.cfe1.dem <- lm_robust(bmgap2015adj ~ as.factor(country2) +
                            as.factor(country2)*country_month,
                          data=democs,
                          clusters = country2)

#R squared = 0.8744

fit.lfe1.dem <- lm_robust(bmgap2015adj ~ as.factor(obsid) +
                            as.factor(obsid)*time_trend, 
                          data=democs,
                          clusters = country2)

#R squared = 0.9434

dat_democs %>%
  filter(r.squared > 0.9434) %>%
  count() #217 / 1000 = 0.217

### Autocracies

fit.cfe1.aut <- lm_robust(bmgap2015adj ~ as.factor(country2) +
                            as.factor(country2)*country_month,
                          data=autocs,
                          clusters = country2)

#R squared = 0.818

fit.lfe1.aut <- lm_robust(bmgap2015adj ~ as.factor(obsid) +
                            as.factor(obsid)*time_trend, 
                          data=autocs,
                          clusters = country2)

#R squared = 0.8618

dat_autocs %>%
  filter(r.squared > 0.8618) %>%
  count() #178 / 1000 = 0.178

### Presidential Democracies

fit.cfe1.pres <- lm_robust(bmgap2015adj ~ as.factor(country2) +
                             as.factor(country2)*country_month,
                           data=presid,
                           clusters = country2)

#R squared = 0.8032

fit.lfe1.pres <- lm_robust(bmgap2015adj ~ as.factor(obsid) +
                             as.factor(obsid)*time_trend, 
                           data=presid,
                           clusters = country2)

#R squared = 0.8947

dat_presid %>%
  filter(r.squared > 0.8947) %>%
  count() #2 / 1000 = 0.002

### Parliamentary Democracies

fit.cfe1.par <- lm_robust(bmgap2015adj ~ as.factor(country2) +
                            as.factor(country2)*country_month,
                          data = parl,
                          clusters = country2)

#R squared = 0.8543

fit.lfe1.par <- lm_robust(bmgap2015adj ~ as.factor(obsid) +
                            as.factor(obsid)*time_trend, 
                          data=parl,
                          clusters = country2)

#R squared = 0.944

dat_parl %>%
  filter(r.squared > 0.9437) %>%
  count() # 742 / 1000 = 0.742

### Oil Importers

fit.cfe1.imp <- lm_robust(bmgap2015adj ~ as.factor(country2) +
                            as.factor(country2)*country_month,
                          data=importers,
                          clusters = country2)

#R squared = 0.8477

fit.lfe1.imp <- lm_robust(bmgap2015adj ~ as.factor(obsid) +
                            as.factor(obsid)*time_trend, 
                          data=importers,
                          clusters = country2)

#R squared = 0.9087

dat_imports %>%
  filter(r.squared > 0.9087) %>%
  count() #236 / 1000 = 0.236

### Oil Exporters

fit.cfe1.exp <- lm_robust(bmgap2015adj ~ as.factor(country2) +
                            as.factor(country2)*country_month,
                          data=exporters,
                          clusters = country2)

#R squared = 0.945

fit.lfe1.exp <- lm_robust(bmgap2015adj ~ as.factor(obsid) +
                            as.factor(obsid)*time_trend, 
                          data=exporters,
                          clusters = country2)

#R squared = 0.964

dat_exports %>%
  filter(r.squared > 0.964) %>%
  count() #119 / 1000 = 0.119

### Persistent Taxers

fit.cfe1.tax <- lm_robust(bmgap2015adj ~ as.factor(country2) +
                            as.factor(country2)*country_month,
                          data=fixednonpersistent,
                          clusters = country2)

#R squared = 0.6759

fit.lfe1.tax <- lm_robust(bmgap2015adj ~ as.factor(obsid) +
                            as.factor(obsid)*time_trend, 
                          data=fixednonpersistent,
                          clusters = country2)

#R squared = 0.7228

dat_fnps %>%
  filter(r.squared > 0.7228) %>%
  count() #233/ 1000 = 0.233

### Persistent Subsidizers

fit.cfe1.sub <- lm_robust(bmgap2015adj ~ as.factor(country2) +
                            as.factor(country2)*country_month,
                          data=persistentsubs,
                          clusters = country2)

#R squared = 0.5899

fit.lfe1.sub <- lm_robust(bmgap2015adj ~ as.factor(obsid) +
                            as.factor(obsid)*time_trend, 
                          data=persistentsubs,
                          clusters = country2)

#R squared = 0.7936

dat_ps %>%
  filter(r.squared > 0.7936) %>%
  count() # 4 / 1000 = 0.004

##All countries, p.value = 0.178
##Democracies, p.value = 0.217
##Autocracies, p.value = 0.178
##Presidential Dem, p.value = 0.002
##Parliament Dem, p.value = 0.742
##Oil Importers, p.value = 0.236
##Oil Exporters, p.value = 0.119
##Persistent Tax, p.value = 0.233
##Persistent Subsidy, p.value = 0.004

###############################################
############ Main Text -- Table 2 #############                     
###############################################

#Table 2. Model results for all countries

## Creating the list of countries to be included in the sample

## Dropped countries:"AGO" "BLR" "CMR" "COG" "GMB" "GNQ" "KAZ" "LBR" "MMR" "OMN" "RWA" "SDN" "SWZ" "SYR" "TCD" "TJK" "UGA" "UZB" "ZWE"

oneleadercountry <- censdat %>% group_by(country2) %>% summarise(n_lead = n_distinct(obsid)) %>% filter(n_lead > 1)

## Creating new tenure length, leader trend, and country trend variables

censdat <- censdat %>%
  filter(country2 %in% oneleadercountry$country2) %>%
  group_by(country2) %>%
  mutate(time_trend_country = row_number()) %>%
  ungroup() %>%
  group_by(obsid) %>%
  mutate(tenure_months_new = n(),
         time_trend_leader = row_number())

# ----------------------------------------------------------------- #
# Part 1: All Countries
# ----------------------------------------------------------------- #

#For all models, column 1 has country intercepts and country trends,
#column 2 has country intercepts and leader traits
#column 3 has leader fixed effects

#Table 1.1 All Countries

fit.cfe1.all <- lm_robust(bmgap2015adj ~ loggdppc + I(loggdppc^2)
                          + NY.GDP.MKTP.KD.ZG + percent_gdp
                          + fuel_income_dependence + VAT.Rate
                          + as.factor(country2)
                          + as.factor(country2)*time_trend_country, 
                          data=censdat,
                          clusters = country2)

fit.lead.traits.all <- lm_robust(bmgap2015adj ~ loggdppc + I(loggdppc^2)
                                 + NY.GDP.MKTP.KD.ZG + percent_gdp
                                 + fuel_income_dependence + VAT.Rate
                                 + leader_age + gender + unieducation + execrlc
                                 + as.factor(country2)
                                 + as.factor(country2)*time_trend_country,
                                 data=censdat,
                                 clusters = country2)

fit.lfe1.all <- lm_robust(bmgap2015adj ~ loggdppc + I(loggdppc^2)
                          + NY.GDP.MKTP.KD.ZG + percent_gdp
                          + fuel_income_dependence + VAT.Rate
                          + as.factor(obsid)
                          + as.factor(obsid)*time_trend_leader, 
                          data=censdat,
                          clusters = country2)

fit.crisis1.all <- lm_robust(bmgap2015adj ~ loggdppc + I(loggdppc^2)
                             + NY.GDP.MKTP.KD.ZG + percent_gdp
                             + fuel_income_dependence + VAT.Rate + Crisis.Year
                             + as.factor(country2)
                             + as.factor(country2)*time_trend_country, 
                             data=censdat,
                             clusters = country2)

# Summaries for Table 1
texreg(list(fit.lfe1.all, fit.cfe1.all, fit.lead.traits.all, fit.crisis1.all), 
       digits = 4, omit.coef = c('as.factor(obsid)*|as.factor(country2)*'), include.ci = FALSE)